Libraries
Load data
I am using the AHA dataset for writing my code because it is the
simplest dataset I can work with. Simplest = significant number of
baseline samples without issue of repeated measures, balanced
distribution of classes, likely difference to be detected present.
p_ps = readRDS("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/POMMS/R24_POMMS/data/processed/20221104_pomms_metadata_plant.rds")
p_ps
phyloseq-class experiment-level object
otu_table() OTU Table: [ 252 taxa and 384 samples ]
sample_data() Sample Data: [ 384 samples by 71 sample variables ]
tax_table() Taxonomy Table: [ 252 taxa by 8 taxonomic ranks ]
sample_data(p_ps)
Sample Data: [384 samples by 71 sample variables]:
Preprocess
Remove problematic samples
Taking out samples with less than or equal to 0 reads and keeping
only entry timepoint reduces samples from 384 down to 240.
p_ps = p_ps %>%
subset_samples( reads > 0) %>%
subset_samples(timepoint == "Entry")
p_ps
phyloseq-class experiment-level object
otu_table() OTU Table: [ 252 taxa and 240 samples ]
sample_data() Sample Data: [ 240 samples by 71 sample variables ]
tax_table() Taxonomy Table: [ 252 taxa by 8 taxonomic ranks ]
Class distribution in this dataset: Case = 193 Control = 47
sample_data(p_ps) %>%
as.data.frame() %>%
as_tibble() %>%
group_by(group) %>%
summarise(count = n())
Feature table
Attach data labels
sample_labels = p_ps@sam_data%>%
as.data.frame() %>%
as_tibble(rownames = NA) %>%
rownames_to_column("sample") %>%
select(sample, group)
features = sample_labels %>%
left_join(features)
Joining, by = "sample"
Zero variance features
No features have zero variance
nzv = features %>%
select(!c(sample,group)) %>%
nearZeroVar(saveMetrics = TRUE)
rm(nzv)
Linear dependencies
feature_ld = filtered_features %>%
select(!c(sample,group)) %>%
findLinearCombos()
filtered_features = filtered_features[, - feature_ld$remove]
Modeling
Load functions from classification_function_AA file
results = loop_label(iterations =20, input, label_list, percent = 0.7, downsampling = TRUE)
[1] "will downsample on training data"
Warning: There were missing values in resampled performance measures.
[1] "group"
[[1]]
[1] "group"
[[2]]
Reference
Prediction Case Healthy
Case 676 125
Healthy 464 155
[1] "will downsample on training data"
Warning: There were missing values in resampled performance measures.Warning: There were missing values in resampled performance measures.
[1] "group_shuffle"
[[1]]
[1] "group_shuffle"
[[2]]
Reference
Prediction Case Healthy
Case 578 142
Healthy 562 138
rm(label_list)
Plotting
AUC plot and t-test
auc_df %>%
pivot_longer(cols = c("group", "group_shuffle"), names_to = "feature", values_to = "AUC") %>%
ggplot()+
geom_boxplot(aes(x=feature, y = AUC))+
theme_classic()

t.test(auc_df$group, auc_df$group_shuffle)
Welch Two Sample t-test
data: auc_df$group and auc_df$group_shuffle
t = 3.6882, df = 37.743, p-value = 0.0007082
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.04036328 0.13863672
sample estimates:
mean of x mean of y
0.5815 0.4920
Importance plots
look-up table
#Making asv reference for names in model
lookup_asv = tax_table(p_ps) %>%
data.frame() %>%
lowest_level() %>%
rownames_to_column("asv") %>%
select(asv, name) %>%
mutate(name = make.names(make.unique(name)))
features
top 10 ranked list
top10
[1] "Helianthus.annuus" "Brassica" "Solanaceae"
[4] "Apiaceae" "Capsicum.annuum" "Poaceae"
[7] "NA." "NA.3" "Lactuca.sativa"
[10] "Zea.mays"
Importance plot
importance_df %>%
pivot_longer(cols = c("NA.":"Apium.graveolens"), names_to = "food", values_to = "importance") %>%
filter(variable == "group" & food %in% top10) %>%
ggplot()+
geom_boxplot(aes(x= reorder(food,-importance), y = importance))+
labs(x= "Food", y ="Importance") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust=1))

Some of the important NAs for predictions only map to bacterial
species in BLAST.
Important taxa directions










lookup_asv %>%
filter(name == "NA.") %>%
pull(asv)
[1] "AAATCCTGTTTTATGAAAATAAACAAGGGTTTCATAAACCGAAAATAAAAAAG"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBMaWJyYXJpZXMKCmBgYHtyLCBpbmNsdWRlID0gRkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KG1pY3JvYmlvbWUpCmxpYnJhcnkocGh5bG9zZXEpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkocmFuZG9tRm9yZXN0KQpsaWJyYXJ5KE1CdXRpbHMpCmBgYAoKIyBMb2FkIGRhdGEKCkkgYW0gdXNpbmcgdGhlIEFIQSBkYXRhc2V0IGZvciB3cml0aW5nIG15IGNvZGUgYmVjYXVzZSBpdCBpcyB0aGUgc2ltcGxlc3QgZGF0YXNldCBJIGNhbiB3b3JrIHdpdGguClNpbXBsZXN0ID0gc2lnbmlmaWNhbnQgbnVtYmVyIG9mIGJhc2VsaW5lIHNhbXBsZXMgd2l0aG91dCBpc3N1ZSBvZiByZXBlYXRlZCBtZWFzdXJlcywgYmFsYW5jZWQgZGlzdHJpYnV0aW9uIG9mIGNsYXNzZXMsIGxpa2VseSBkaWZmZXJlbmNlIHRvIGJlIGRldGVjdGVkIHByZXNlbnQuCgoKYGBge3J9CnBfcHMgPSByZWFkUkRTKCIvVXNlcnMvYWEzNzAvTGlicmFyeS9DbG91ZFN0b3JhZ2UvQm94LUJveC9wcm9qZWN0X2RhdmlkbGFiL0xBRF9MQUJfUGVyc29ubmVsL0FtbWFyYV9BL1Byb2plY3RzL1BPTU1TL1IyNF9QT01NUy9kYXRhL3Byb2Nlc3NlZC8yMDIyMTEwNF9wb21tc19tZXRhZGF0YV9wbGFudC5yZHMiKQoKcF9wcwoKc2FtcGxlX2RhdGEocF9wcykKYGBgCgojIFByZXByb2Nlc3MKCiMjIFJlbW92ZSBwcm9ibGVtYXRpYyBzYW1wbGVzCgpUYWtpbmcgb3V0IHNhbXBsZXMgd2l0aCBsZXNzIHRoYW4gb3IgZXF1YWwgdG8gMCByZWFkcyBhbmQga2VlcGluZyBvbmx5IGVudHJ5IHRpbWVwb2ludCByZWR1Y2VzIHNhbXBsZXMgZnJvbSAzODQgZG93biB0byAyNDAuCgpgYGB7cn0KcF9wcyA9IHBfcHMgJT4lCiAgc3Vic2V0X3NhbXBsZXMoIHJlYWRzID4gMCkgJT4lCiAgc3Vic2V0X3NhbXBsZXModGltZXBvaW50ID09ICJFbnRyeSIpCgpwX3BzCmBgYAoKQ2xhc3MgZGlzdHJpYnV0aW9uIGluIHRoaXMgZGF0YXNldDoKQ2FzZSA9IDE5MwpDb250cm9sID0gNDcKCmBgYHtyfQpzYW1wbGVfZGF0YShwX3BzKSAlPiUKICBhcy5kYXRhLmZyYW1lKCkgJT4lCiAgYXNfdGliYmxlKCkgJT4lCiAgZ3JvdXBfYnkoZ3JvdXApICU+JQogIHN1bW1hcmlzZShjb3VudCA9IG4oKSkKYGBgCgojIyBEYXRhIHRyYW5zZm9ybQoKYGBge3J9Cm90dV9jbHIgPSBhYnVuZGFuY2VzKHBfcHMsICJjbHIiKSAlPiUgIyByY2xyIHRyYW5zZm9ybSBjb3VsZCBub3QgYmUgcGVyZm9ybWVkLCBnZXQgTmFucwogIHQoKQoKcF9wcyA9IHBoeWxvc2VxKG90dV90YWJsZShvdHVfY2xyLCB0YXhhX2FyZV9yb3dzID0gRkFMU0UpLAogICAgICAgICAgICAgICAgICAgc2FtcGxlX2RhdGEoc2FtcGxlX2RhdGEocF9wcykpLAogICAgICAgICAgICAgICAgICAgdGF4X3RhYmxlKHRheF90YWJsZShwX3BzKSkpCgpybShvdHVfY2xyKQpgYGAKCiMjIEZlYXR1cmUgdGFibGUKCiMjIyBFeHRyYWN0IGRhdGEKCk9yZGVyIG9mIGFzdiBjb2xuYW1lcyBpbiBPVFUgdGFibGUgYW5kIHRheCB0YWJsZSBpcyB0aGUgc2FtZS4KCmBgYHtyfQpmZWF0dXJlcyA9IHBfcHNAb3R1X3RhYmxlICU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICBhc190aWJibGUocm93bmFtZXMgPSBOQSkgJT4lCiAgcm93bmFtZXNfdG9fY29sdW1uKCJzYW1wbGUiKQoKZmVhdHVyZV9sYWJlbHMgPSBjKCJzYW1wbGUiKQoKIG5hbWVzID0gdGF4X3RhYmxlKHBfcHMpICU+JQogICAgICBkYXRhLmZyYW1lKCkgJT4lCiAgICAgIGxvd2VzdF9sZXZlbCgpICU+JQogICBwdWxsKG5hbWUpCgogZmVhdHVyZV9sYWJlbHMgPSBhcHBlbmQoZmVhdHVyZV9sYWJlbHMsIG5hbWVzKQoKIGZlYXR1cmVfbGFiZWxzID0gbWFrZS51bmlxdWUoZmVhdHVyZV9sYWJlbHMpICU+JQogICBtYWtlLm5hbWVzKCkKCiBjb2xuYW1lcyhmZWF0dXJlcykgPSBmZWF0dXJlX2xhYmVscwoKZmVhdHVyZXMKYGBgCiMjIyBBdHRhY2ggZGF0YSBsYWJlbHMKCmBgYHtyfQpzYW1wbGVfbGFiZWxzID0gcF9wc0BzYW1fZGF0YSU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICBhc190aWJibGUocm93bmFtZXMgPSBOQSkgJT4lCiAgcm93bmFtZXNfdG9fY29sdW1uKCJzYW1wbGUiKSAlPiUKICBzZWxlY3Qoc2FtcGxlLCBncm91cCkKCmZlYXR1cmVzID0gc2FtcGxlX2xhYmVscyAlPiUKICBsZWZ0X2pvaW4oZmVhdHVyZXMpCmBgYAoKIyMjIFplcm8gdmFyaWFuY2UgZmVhdHVyZXMKCk5vIGZlYXR1cmVzIGhhdmUgemVybyB2YXJpYW5jZQpgYGB7cn0Kbnp2ID0gZmVhdHVyZXMgJT4lCiAgc2VsZWN0KCFjKHNhbXBsZSxncm91cCkpICU+JQogIG5lYXJaZXJvVmFyKHNhdmVNZXRyaWNzID0gVFJVRSkKYGBgCgpgYGB7cn0Kcm0obnp2KQpgYGAKCiMjIyBDb3JyZWxhdGVkIHByZWRpY3RvcnMKCmBgYHtyfQpmZWF0dXJlX2NvciA9IGZlYXR1cmVzICU+JQogIHNlbGVjdCghYyhzYW1wbGUsZ3JvdXApKSAlPiUKIGNvcigpIAoKc3VtbWFyeShmZWF0dXJlX2Nvclt1cHBlci50cmkoZmVhdHVyZV9jb3IpXSkKYGBgCgpgYGB7cn0KaGlnaF9jb3IgPSBmaW5kQ29ycmVsYXRpb24oZmVhdHVyZV9jb3IsIGN1dG9mZiA9IC43NSkKCmZpbHRlcmVkX2ZlYXR1cmVzID0gZmVhdHVyZXNbLC1oaWdoX2Nvcl0KYGBgCgpgYGB7cn0Kcm0oaGlnaF9jb3IpCnJtKGZlYXR1cmVfY29yKQpgYGAKCiMjIyBMaW5lYXIgZGVwZW5kZW5jaWVzCgpgYGB7cn0KZmVhdHVyZV9sZCA9IGZpbHRlcmVkX2ZlYXR1cmVzICU+JQogIHNlbGVjdCghYyhzYW1wbGUsZ3JvdXApKSAlPiUKICBmaW5kTGluZWFyQ29tYm9zKCkKCmZpbHRlcmVkX2ZlYXR1cmVzID0gZmlsdGVyZWRfZmVhdHVyZXNbLCAtIGZlYXR1cmVfbGQkcmVtb3ZlXQpgYGAKCiMjIElucHV0IHRhYmxlCgpQcmVwcm9jZXNzaW5nIHJlZHVjZXMgY29sdW1uIG51bWJlciB0byAxMzEgZnJvbSAyMzUKCmBgYHtyfQppbnB1dCA9IGZpbHRlcmVkX2ZlYXR1cmVzICU+JQogIG11dGF0ZShncm91cF9zaHVmZmxlID0gc2FtcGxlKGdyb3VwKSAlPiUKICAgICAgICAgICBhcy5mYWN0b3IoKSwKICAgICAgICAgZ3JvdXAgPSBhcy5mYWN0b3IoZ3JvdXApKQpgYGAKCiMgTW9kZWxpbmcKCkxvYWQgZnVuY3Rpb25zIGZyb20gY2xhc3NpZmljYXRpb25fZnVuY3Rpb25fQUEgZmlsZQoKYGBge3J9CmxhYmVsX2xpc3QgPSBjKCJncm91cCIsICJncm91cF9zaHVmZmxlIikKcmVzdWx0cyA9IGxvb3BfbGFiZWwoaXRlcmF0aW9ucyA9MjAsIGlucHV0LCBsYWJlbF9saXN0LCBwZXJjZW50ID0gMC43LCBkb3duc2FtcGxpbmcgPSBUUlVFKQoKYXVjX2RmID0gcmVzdWx0c1tbMl1dCgppbXBvcnRhbmNlX2RmID0gcmVzdWx0c1tbMV1dCmBgYAoKYGBge3J9CnJtKGxhYmVsX2xpc3QpCmBgYAoKIyBQbG90dGluZwoKIyMgQVVDIHBsb3QgYW5kIHQtdGVzdApgYGB7cn0KYXVjX2RmICU+JQogIHBpdm90X2xvbmdlcihjb2xzID0gYygiZ3JvdXAiLCAiZ3JvdXBfc2h1ZmZsZSIpLCBuYW1lc190byA9ICJmZWF0dXJlIiwgdmFsdWVzX3RvID0gIkFVQyIpICU+JQogIGdncGxvdCgpKwogIGdlb21fYm94cGxvdChhZXMoeD1mZWF0dXJlLCB5ID0gQVVDKSkrCiAgdGhlbWVfY2xhc3NpYygpCgp0LnRlc3QoYXVjX2RmJGdyb3VwLCBhdWNfZGYkZ3JvdXBfc2h1ZmZsZSkKCmBgYAoKIyMgSW1wb3J0YW5jZSBwbG90cwoKIyMjIGxvb2stdXAgdGFibGUKYGBge3J9CiNNYWtpbmcgYXN2IHJlZmVyZW5jZSBmb3IgbmFtZXMgaW4gbW9kZWwKbG9va3VwX2FzdiA9IHRheF90YWJsZShwX3BzKSAlPiUgCiAgICAgZGF0YS5mcmFtZSgpICU+JQogICAgICBsb3dlc3RfbGV2ZWwoKSAlPiUKICAgcm93bmFtZXNfdG9fY29sdW1uKCJhc3YiKSAlPiUgCiAgIHNlbGVjdChhc3YsIG5hbWUpICU+JQogICBtdXRhdGUobmFtZSA9IG1ha2UubmFtZXMobWFrZS51bmlxdWUobmFtZSkpKQpgYGAKCiMjIyBzdW1tYXJpemUgaW1wb3J0YW5jZXMKYGBge3J9CiMgTWFraW5nIHN1bW1hcnkgZGYgb2YgaW1wb3J0YW5jZXMKaW1wb3J0YW5jZV9zdW1tYXJ5ID0gaW1wb3J0YW5jZV9kZiAlPiUKICBwaXZvdF9sb25nZXIoY29scyA9IGMoIk5BLiI6IkFwaXVtLmdyYXZlb2xlbnMiKSwgbmFtZXNfdG8gPSAiZm9vZCIsIHZhbHVlc190byA9ICJpbXBvcnRhbmNlIikgJT4lCiAgZ3JvdXBfYnkodmFyaWFibGUsIGZvb2QpICU+JQogIHN1bW1hcmlzZShtZWFuX2ltcG9ydGFuY2UgPSBtZWFuKGltcG9ydGFuY2UpKSAlPiUKICBhcnJhbmdlKHZhcmlhYmxlLCBkZXNjKG1lYW5faW1wb3J0YW5jZSkpCmBgYAojIyMgdG9wIDEwIHJhbmtlZCBsaXN0CmBgYHtyfQojIEdldHRpbmcgdGhlIHJhbmtlZCBsaXN0CnJhbmtpbmcgPSBpbXBvcnRhbmNlX3N1bW1hcnkgJT4lCiAgZmlsdGVyKHZhcmlhYmxlID09ICJncm91cCIpICU+JQogIHB1bGwoZm9vZCkKCnRvcDEwID0gcmFua2luZ1sxOjEwXQpgYGAKCiMjIyBJbXBvcnRhbmNlIHBsb3QKYGBge3J9CmltcG9ydGFuY2VfZGYgJT4lCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSBjKCJOQS4iOiJBcGl1bS5ncmF2ZW9sZW5zIiksIG5hbWVzX3RvID0gImZvb2QiLCB2YWx1ZXNfdG8gPSAiaW1wb3J0YW5jZSIpICAlPiUKICBmaWx0ZXIodmFyaWFibGUgPT0gImdyb3VwIiAmIGZvb2QgJWluJSB0b3AxMCkgJT4lCiAgZ2dwbG90KCkrCiAgZ2VvbV9ib3hwbG90KGFlcyh4PSByZW9yZGVyKGZvb2QsLWltcG9ydGFuY2UpLCB5ID0gaW1wb3J0YW5jZSkpKwogIGxhYnMoeD0gIkZvb2QiLCB5ID0iSW1wb3J0YW5jZSIpICsKICB0aGVtZV9jbGFzc2ljKCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0PTEpKQpgYGAKClNvbWUgb2YgdGhlIGltcG9ydGFudCBOQXMgZm9yIHByZWRpY3Rpb25zIG9ubHkgbWFwIHRvIGJhY3RlcmlhbCBzcGVjaWVzIGluIEJMQVNULgoKIyBJbXBvcnRhbnQgdGF4YSBkaXJlY3Rpb25zCgpgYGB7cn0Kc2FtcGxlX2RhdGEocF9wcykgJT4lCiAgZGF0YS5mcmFtZSgpICU+JQogIGZpbHRlcih0aW1lcG9pbnQgPT0gIkVudHJ5IikgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21fYm94cGxvdChhZXMoeD0gZ3JvdXAsIHkgPSBibWkpKSsKICB0aGVtZV9jbGFzc2ljKCkKCnBsb3RfZm9vZF9kaXJlY3Rpb24odG9wMTAsIGlucHV0LCB2YXJpYWJsZSA9ICJncm91cCIpCmBgYAoKCmBgYHtyfQpsb29rdXBfYXN2ICU+JQogIGZpbHRlcihuYW1lID09ICJOQS4iKSAlPiUKICBwdWxsKGFzdikKYGBgCgoKYGBge3J9CmZlYXR1cmVfY29yICU+JQogIGFzLmRhdGEuZnJhbWUoKSAlPiUKICBzZWxlY3QoSGVsaWFudGh1cy5hbm51dXMpICU+JQogIGFycmFuZ2UoZGVzYyhIZWxpYW50aHVzLmFubnV1cykpCmBgYAoKCg==